Import the necessary libraries
library(ChIPseeker)
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
library(org.Hs.eg.db)
Read in the files
txdb<-TxDb.Hsapiens.UCSC.hg38.knownGene
peak_core <- readPeakFile('D:/TEs_CoTEs_pipline/data/V2-TEs/non_olap/cores1000/542_L1MA1_Merged.bed')
peak_TE <- readPeakFile('D:/TEs_CoTEs_pipline/data/V2-TEs/non_olap/TEs/542_L1MA1_Merged.bed')
#plotAnnoPie(peakAnno)
#peakAnno = as.data.frame(peakAnno)
Anotate the genomic regions
peakAnno_TE<-annotatePeak(peak_TE,tssRegion=c(-3000,3000),TxDb=txdb,annoDb="org.Hs.eg.db")
## >> preparing features information... 2023-11-23 3:24:59 PM
## >> identifying nearest features... 2023-11-23 3:25:02 PM
## >> calculating distance from peak to TSS... 2023-11-23 3:25:04 PM
## >> assigning genomic annotation... 2023-11-23 3:25:04 PM
## >> adding gene annotation... 2023-11-23 3:25:49 PM
## >> assigning chromosome lengths 2023-11-23 3:25:50 PM
## >> done... 2023-11-23 3:25:50 PM
peakAnno_core<-annotatePeak(peak_core,tssRegion=c(-3000,3000),TxDb=txdb,annoDb="org.Hs.eg.db")
## >> preparing features information... 2023-11-23 3:25:50 PM
## >> identifying nearest features... 2023-11-23 3:25:50 PM
## >> calculating distance from peak to TSS... 2023-11-23 3:25:50 PM
## >> assigning genomic annotation... 2023-11-23 3:25:50 PM
## >> adding gene annotation... 2023-11-23 3:25:56 PM
## >> assigning chromosome lengths 2023-11-23 3:25:56 PM
## >> done... 2023-11-23 3:25:56 PM
Plot the proportions of genomic regions
plotAnnoPie(peakAnno_TE)

plotAnnoPie(peakAnno_core)

peakAnno = as.data.frame(peakAnno_core)
peakAnno
## seqnames start end width strand
## 1 chr3 162617689 163586686 968998 *
## 2 chrX 66132624 66846941 714318 *
## 3 chrX 66846981 67382602 535622 *
## 4 chrX 78234440 79179059 944620 *
## 5 chrX 84299068 84656709 357642 *
## 6 chrX 86101265 86780526 679262 *
## 7 chrX 92416130 93562722 1146593 *
## 8 chrX 94305762 95246445 940684 *
## 9 chrX 95310191 96209102 898912 *
## 10 chrX 112938020 114131076 1193057 *
## 11 chrX 120505949 121496670 990722 *
## 12 chrX 121550945 122531150 980206 *
## 13 chrX 126492302 127136271 643970 *
## 14 chrX 141053490 142411052 1357563 *
## 15 chrX 142839916 143969656 1129741 *
## 16 chrX 143970242 144903835 933594 *
## 17 chrX 101392657 101918323 525667 *
## 18 chr4 43113058 43804898 691841 *
## 19 chrX 33642172 33940271 298100 *
## 20 chr21 22868644 23233124 364481 *
## 21 chrX 99251366 99779440 528075 *
## 22 chrX 87262284 87886862 624579 *
## 23 chrX 106073913 106306851 232939 *
## 24 chrX 98321649 98539954 218306 *
## 25 chr1 187114160 187258884 144725 *
## 26 chr1 211447197 211464618 17422 *
## 27 chrX 140234740 140403394 168655 *
## 28 chr13 80670502 80680494 9993 *
## 29 chr2 80709109 80735002 25894 *
## 30 chr5 43460381 43473237 12857 *
## 31 chr5 100279730 100292011 12282 *
## 32 chr5 159676162 159693179 17018 *
## 33 chr6_GL000253v2_alt 1587720 1596898 9179 *
## 34 chr1 196758085 196849258 91174 *
## 35 chr10 92056318 92061948 5631 *
## 36 chr14 50681907 50696101 14195 *
## 37 chr15 33267073 33276602 9530 *
## 38 chr17 58256426 58263967 7542 *
## 39 chr19 45409819 45413274 3456 *
## 40 chr4 111131216 111138819 7604 *
## 41 chr5 89652284 89684827 32544 *
## 42 chr5 144629900 144663592 33693 *
## 43 chr6 30277496 30286662 9167 *
## 44 chr6 74548192 74593690 45499 *
## 45 chr6 162561378 162571409 10032 *
## 46 chr6_GL000251v2_alt 1757187 1766350 9164 *
## 47 chr6_GL000252v2_alt 1533356 1542520 9165 *
## 48 chr6_GL000255v2_alt 1532591 1541752 9162 *
## 49 chr6_GL000256v2_alt 1575958 1585132 9175 *
## 50 chrX 53765579 53782542 16964 *
## annotation geneChr
## 1 Promoter (<=1kb) 3
## 2 Promoter (<=1kb) 23
## 3 Exon (ENST00000441055.1/ENST00000441055.1, exon 1 of 1) 23
## 4 Promoter (<=1kb) 23
## 5 Promoter (<=1kb) 23
## 6 Promoter (<=1kb) 23
## 7 3' UTR 23
## 8 Promoter (<=1kb) 23
## 9 Exon (ENST00000604390.1/ENST00000604390.1, exon 1 of 1) 23
## 10 Promoter (<=1kb) 23
## 11 Promoter (<=1kb) 23
## 12 Promoter (<=1kb) 23
## 13 Promoter (<=1kb) 23
## 14 Promoter (<=1kb) 23
## 15 Promoter (<=1kb) 23
## 16 Exon (ENST00000441721.1/ENST00000441721.1, exon 1 of 1) 23
## 17 Promoter (<=1kb) 23
## 18 Promoter (<=1kb) 4
## 19 Promoter (<=1kb) 23
## 20 Promoter (<=1kb) 21
## 21 Exon (ENST00000605078.1/ENST00000605078.1, exon 1 of 1) 23
## 22 Promoter (<=1kb) 23
## 23 Promoter (<=1kb) 23
## 24 Exon (ENST00000395772.3/ENST00000395772.3, exon 1 of 1) 23
## 25 Promoter (<=1kb) 1
## 26 Distal Intergenic 1
## 27 Exon (ENST00000420490.1/ENST00000420490.1, exon 1 of 2) 23
## 28 Distal Intergenic 13
## 29 Intron (ENST00000450290.1/ENST00000450290.1, intron 3 of 4) 2
## 30 Intron (ENST00000500337.6/64417, intron 3 of 4) 5
## 31 Distal Intergenic 5
## 32 Intron (ENST00000636819.1/105377684, intron 4 of 6) 5
## 33 Intron (ENST00000437311.1/414778, intron 1 of 4) 96
## 34 Promoter (<=1kb) 1
## 35 Intron (ENST00000412050.8/22849, intron 9 of 9) 10
## 36 Distal Intergenic 14
## 37 Distal Intergenic 15
## 38 Intron (ENST00000582328.5/4025, intron 9 of 12) 17
## 39 Promoter (1-2kb) 19
## 40 Intron (ENST00000681682.1/ENST00000681682.1, intron 3 of 4) 4
## 41 Exon (ENST00000503691.1/105379076, exon 2 of 2) 5
## 42 Distal Intergenic 5
## 43 Exon (ENST00000624252.1/ENST00000624252.1, exon 1 of 1) 6
## 44 Promoter (1-2kb) 6
## 45 Exon (ENST00000441609.1/ENST00000441609.1, exon 1 of 1) 6
## 46 Intron (ENST00000455160.1/414778, intron 1 of 4) 94
## 47 Intron (ENST00000426984.1/414778, intron 1 of 4) 95
## 48 Intron (ENST00000439480.1/414778, intron 1 of 4) 98
## 49 Intron (ENST00000448760.1/414778, intron 1 of 4) 99
## 50 Exon (ENST00000458382.2/ENST00000458382.2, exon 1 of 1) 23
## geneStart geneEnd geneLength geneStrand geneId transcriptId
## 1 163109152 163303309 194158 2 647107 ENST00000660789.1
## 2 66595637 66639078 43442 2 60401 ENST00000374719.8
## 3 67544021 67730619 186599 1 367 ENST00000374690.9
## 4 78901194 78901249 56 2 100422932 ENST00000637458.1
## 5 84317874 84502453 184580 2 139324 ENST00000297977.9
## 6 86148451 86832602 684152 1 117154 ENST00000373125.9
## 7 93670930 93673578 2649 2 4675 ENST00000373079.4
## 8 95063141 95063226 86 2 100313772 ENST00000408260.1
## 9 96337236 96337912 677 2 643486 ENST00000605735.1
## 10 113616300 114059121 442822 2 105463123 ENST00000468762.3
## 11 121370972 121371053 82 1 100500869 ENST00000584290.1
## 12 122422006 122478493 56488 1 101928359 ENST00000665977.2
## 13 126819729 126821786 2058 1 100130613 ENST00000371125.4
## 14 141934496 142740581 806086 1 105373345 ENST00000664519.1
## 15 143622790 143634503 11714 2 139065 ENST00000596188.2
## 16 143884071 143885255 1185 1 389898 ENST00000618570.1
## 17 101418287 101533459 115173 1 100131755 ENST00000433011.6
## 18 43457527 43492543 35017 2 105374436 ENST00000508563.1
## 19 33726337 34078228 351892 1 105373153 ENST00000664553.1
## 20 23079284 23079392 109 1 102466967 ENST00000619419.1
## 21 100350662 100406515 55854 2 57526 ENST00000636150.1
## 22 87723928 87728679 4752 2 124905233 ENST00000691872.1
## 23 106168305 106208955 40651 1 139221 ENST00000337685.6
## 24 97533173 97617259 84087 2 10824 ENST00000445414.1
## 25 187228587 187330505 101919 1 105371655 ENST00000642737.1
## 26 211476522 211492031 15510 2 343035 ENST00000484910.1
## 27 140216035 140216804 770 2 105373343 ENST00000458577.1
## 28 80335976 80341126 5151 2 10253 ENST00000377104.4
## 29 80589339 80608489 19151 1 1496 ENST00000467892.1
## 30 43444252 43483836 39585 2 64417 ENST00000500337.6
## 31 100375700 100381398 5699 2 100133050 ENST00000621479.1
## 32 159698589 159761071 62483 2 105377687 ENST00000523311.3
## 33 1569852 1572568 2717 1 3139 ENST00000475416.1
## 34 196774813 196790022 15210 1 10878 ENST00000471440.6
## 35 91980520 91982415 1896 1 9044 ENST00000476401.1
## 36 50633580 50668306 34727 2 60485 ENST00000324679.5
## 37 33242720 33247621 4902 2 100652857 ENST00000529623.5
## 38 58252182 58268518 16337 1 4025 ENST00000389576.4
## 39 45413436 45414477 1042 2 2067 ENST00000592410.5
## 40 110860582 110860647 66 2 100126354 ENST00000401142.1
## 41 89581209 89677701 96493 1 105379076 ENST00000503691.1
## 42 144205277 144485686 280410 1 57528 ENST00000507359.3
## 43 30286703 30296978 10276 2 414777 ENST00000659563.1
## 44 74594179 74595213 1035 2 105377858 ENST00000436672.1
## 45 162367282 162555620 188339 2 105369171 ENST00000674259.1
## 46 1739382 1742102 2721 1 3139 ENST00000482589.1
## 47 1515484 1518204 2721 1 3139 ENST00000492699.1
## 48 1514710 1517430 2721 1 3139 ENST00000475567.1
## 49 1558095 1560815 2721 1 3139 ENST00000470222.1
## 50 53680084 53686752 6669 2 10075 ENST00000704102.1
## distanceToTSS ENSEMBL SYMBOL
## 1 0 ENSG00000241369 LINC01192
## 2 0 ENSG00000131080 EDA2R
## 3 -161419 ENSG00000169083 AR
## 4 0 ENSG00000283622 MIR4328
## 5 0 ENSG00000165259 HDX
## 6 0 ENSG00000126733 DACH2
## 7 110856 ENSG00000186310 NAP1L3
## 8 0 ENSG00000221187 MIR548M
## 9 128810 ENSG00000271209 BRDTP1
## 10 0 <NA> XACT
## 11 0 ENSG00000265456 MIR3672
## 12 0 <NA> LOC101928359
## 13 0 ENSG00000183631 PRR32
## 14 0 <NA> LOC105373345
## 15 0 ENSG00000179542 SLITRK4
## 16 86171 ENSG00000276380 UBE2NL
## 17 0 ENSG00000196440 ARMCX4
## 18 0 ENSG00000248143 LINC02383
## 19 0 <NA> LOC105373153
## 20 0 ENSG00000275469 MIR6130
## 21 627075 ENSG00000165194 PCDH19
## 22 0 ENSG00000289575 LOC124905233
## 23 0 ENSG00000157502 PWWP3B
## 24 -704390 ENSG00000236256 DIAPH2-AS1
## 25 0 <NA> LOC105371655
## 26 27413 ENSG00000198570 RD3
## 27 -17936 ENSG00000231110 LOC105373343
## 28 -329376 ENSG00000136158 SPRY2
## 29 119770 ENSG00000066032 CTNNA2
## 30 10599 ENSG00000151881 TMEM267
## 31 89387 <NA> GUSBP19
## 32 67892 <NA> LOC105377687
## 33 17868 ENSG00000291097 HLA-L
## 34 0 ENSG00000116785 CFHR3
## 35 75798 ENSG00000095564 BTAF1
## 36 -13601 ENSG00000151748 SAV1
## 37 -19452 <NA> TMCO5B
## 38 4244 ENSG00000167419 LPO
## 39 1203 ENSG00000012061 ERCC1
## 40 -270569 ENSG00000215961 MIR297
## 41 71075 <NA> LINC02161
## 42 424623 ENSG00000183775 KCTD16
## 43 10316 ENSG00000231074 HCG18
## 44 1523 <NA> LOC105377858
## 45 -5758 <NA> LOC105369171
## 46 17805 ENSG00000291097 HLA-L
## 47 17872 ENSG00000291097 HLA-L
## 48 17881 ENSG00000291097 HLA-L
## 49 17863 ENSG00000291097 HLA-L
## 50 -78827 ENSG00000086758 HUWE1
## GENENAME
## 1 long intergenic non-protein coding RNA 1192
## 2 ectodysplasin A2 receptor
## 3 androgen receptor
## 4 microRNA 4328
## 5 highly divergent homeobox
## 6 dachshund family transcription factor 2
## 7 nucleosome assembly protein 1 like 3
## 8 microRNA 548m
## 9 bromodomain testis associated pseudogene 1
## 10 X active specific transcript
## 11 microRNA 3672
## 12 uncharacterized LOC101928359
## 13 proline rich 32
## 14 uncharacterized LOC105373345
## 15 SLIT and NTRK like family member 4
## 16 ubiquitin conjugating enzyme E2 N like (gene/pseudogene)
## 17 armadillo repeat containing X-linked 4
## 18 long intergenic non-protein coding RNA 2383
## 19 uncharacterized LOC105373153
## 20 microRNA 6130
## 21 protocadherin 19
## 22 uncharacterized LOC124905233
## 23 PWWP domain containing 3B
## 24 DIAPH2 antisense RNA 1
## 25 uncharacterized LOC105371655
## 26 RD3 regulator of GUCY2D
## 27 uncharacterized LOC105373343
## 28 sprouty RTK signaling antagonist 2
## 29 catenin alpha 2
## 30 transmembrane protein 267
## 31 GUSB pseudogene 19
## 32 uncharacterized LOC105377687
## 33 major histocompatibility complex, class I, L (pseudogene)
## 34 complement factor H related 3
## 35 B-TFIID TATA-box binding protein associated factor 1
## 36 salvador family WW domain containing protein 1
## 37 transmembrane and coiled-coil domains 5B (pseudogene)
## 38 lactoperoxidase
## 39 ERCC excision repair 1, endonuclease non-catalytic subunit
## 40 microRNA 297
## 41 long intergenic non-protein coding RNA 2161
## 42 potassium channel tetramerization domain containing 16
## 43 HLA complex group 18
## 44 uncharacterized LOC105377858
## 45 uncharacterized LOC105369171
## 46 major histocompatibility complex, class I, L (pseudogene)
## 47 major histocompatibility complex, class I, L (pseudogene)
## 48 major histocompatibility complex, class I, L (pseudogene)
## 49 major histocompatibility complex, class I, L (pseudogene)
## 50 HECT, UBA and WWE domain containing E3 ubiquitin protein ligase 1
Since the significant chromosome is chrX, curious about the
functions of genes evolved
library(gprofiler2)
filtered_df<-subset(peakAnno,peakAnno$seqnames=="chrX")
genes_list<-filtered_df$SYMBOL
gostres<-gost(query=c(genes_list),organism="hsapiens",significant=TRUE,user_threshold = 0.05)
gostplot(gostres, capped = TRUE, interactive = TRUE)